Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)dfsub <- read.csv("../data/rawdata_participants.csv") |>
mutate(across(everything(), ~ifelse(.x == "", NA, .x)))
df <- read.csv("../data/rawdata_IllusionGame.csv") |>
group_by(Illusion_Type) |>
mutate(Illusion_Effect = ifelse(sign(Illusion_Strength) > 0, "Incongruent", "Congruent"),
Task_Difficulty = abs(Illusion_Difference),
Condition_Illusion = datawizard::categorize(
Illusion_Strength, split="quantile", n_groups=4,
labels=c("Congruent - Strong", "Congruent - Mild", "Incongruent - Mild", "Incongruent - Strong")),
Condition_Difficulty = datawizard::categorize(Task_Difficulty, split="quantile", n_groups=2, labels=c("Hard", "Easy"))) |>
ungroup()
# table(dfsub$Experimenter)The initial sample consisted of 768 participants (Mean age = 22.0, SD = 8.4, range: [16, 73]; Gender: 76.4% women, 20.8% men, 2.60% non-binary, 0.13% missing; Education: Bachelor, 19.79%; Doctorate, 1.69%; High School, 70.44%; Master, 2.73%; missing, 0.13%; Other, 5.21%), for a total trial number of 368640.
SD_per_dim <- function(x, dims="") {
m <- matrix(nrow=nrow(x), ncol=0)
for(s in dims) {
m <- cbind(m, sapply(as.data.frame(t(x[grepl(s, names(x))])), sd))
}
m
}ipip6 <- select(dfsub, starts_with("IPIP6_"), -IPIP6_Duration, -IPIP6_Order)
ipip6[grepl("_R", names(ipip6))] <- 1 - ipip6[grepl("_R", names(ipip6))]
dfsub$IPIP6_Extraversion <- rowMeans(ipip6[grepl("Extraversion", names(ipip6))])
dfsub$IPIP6_Conscientiousness <- rowMeans(ipip6[grepl("Conscientiousness", names(ipip6))])
dfsub$IPIP6_Neuroticism <- rowMeans(ipip6[grepl("Neuroticism", names(ipip6))])
dfsub$IPIP6_Openness <- rowMeans(ipip6[grepl("Openness", names(ipip6))])
dfsub$IPIP6_HonestyHumility <- rowMeans(ipip6[grepl("HonestyHumility", names(ipip6))])
dfsub$IPIP6_Agreeableness <- rowMeans(ipip6[grepl("Agreeableness", names(ipip6))])
dfsub$IPIP6_SD <- rowMeans(SD_per_dim(ipip6, c("Extraversion", "Conscientiousness", "Neuroticism",
"Openness", "HonestyHumility", "Agreeableness")))pid5 <- select(dfsub, starts_with("PID5_"), -PID5_Duration, -PID5_Order)
dfsub$PID5_Disinhibition <- rowMeans(pid5[grepl("Disinhibition", names(pid5))])
dfsub$PID5_Detachment <- rowMeans(pid5[grepl("Detachment", names(pid5))])
dfsub$PID5_NegativeAffect <- rowMeans(pid5[grepl("NegativeAffect", names(pid5))])
dfsub$PID5_Antagonism <- rowMeans(pid5[grepl("Antagonism", names(pid5))])
dfsub$PID5_Psychoticism <- rowMeans(pid5[grepl("Psychoticism", names(pid5))])
dfsub$PID5_SD <- rowMeans(SD_per_dim(pid5, c("Disinhibition", "Detachment", "NegativeAffect",
"Antagonism", "Psychoticism")))sss <- select(dfsub, starts_with("SSS_"), -SSS_Duration, -SSS_Order)
dfsub$SSS_General <- rowMeans(sss / 4)
dfsub$SSS_SD <- rowMeans(SD_per_dim(sss, c("SSS")))From Maertens et al. (2023): “We recommend to calculate and report all five scores of the Verification done framework”:
mist <- select(dfsub, starts_with("MIST_"), -MIST_Duration, -MIST_Order)
dfsub$MIST_RealDetection <- rowSums(mist[grepl("Real", names(mist))])
dfsub$MIST_FakeDetection <- rowSums(mist[grepl("Fake", names(mist))] == 0)
dfsub$MIST_Discernment <- dfsub$MIST_RealDetection + dfsub$MIST_FakeDetection
dfsub$MIST_Distrust <- rowSums(mist == 0) - 8
dfsub$MIST_Naivete <- rowSums(mist) - 8# Consecutive count of participants per day (as area)
dfsub |>
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |>
group_by(Date, Experimenter) |>
summarize(N = n()) |>
ungroup() |>
complete(Date, Experimenter, fill = list(N = 0)) |>
group_by(Experimenter) |>
mutate(N = cumsum(N)) |>
ggplot(aes(x = Date, y = N)) +
geom_area(aes(fill=Experimenter)) +
scale_y_continuous(expand = c(0, 0)) +
labs(
title = "Recruitment History",
x = "Date",
y = "Total Number of Participants"
) +
see::theme_modern()The experiment’s median duration is 26.09 min (50% HDI [20.90, 28.08]).
dfsub |>
mutate(Participant = fct_reorder(Participant, Experiment_Duration),
Category = ifelse(Experiment_Duration > 50, "extra", "ok"),
Duration = ifelse(Experiment_Duration > 50, 50, Experiment_Duration)) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Category, shape = Category)) +
geom_vline(xintercept = median(dfsub$Experiment_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("extra" = "red", ok = "black")) +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "grey", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Experiment Completion Time",
x = "Duration (in minutes)",
y = "Participants"
) +
see::theme_modern() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
axis.text.y = element_blank()) outliers <- c("S043", "S079")
outliers_half <- c("S032", "S049")errorrate <- df |>
group_by(Participant, Illusion_Type, Block) |>
summarize(ErrorRate_per_block = sum(Error) / n()) |>
ungroup() |>
arrange(desc(ErrorRate_per_block))
d_all <- estimate_density(filter(df, RT < 3)$RT)
plot_distribution <- function(dat) {
data_error <- errorrate |>
filter(Participant %in% unique(dat$Participant)) |>
group_by(Participant, Block) |>
summarize(y = mean(ErrorRate_per_block), .groups="drop") |>
mutate(x = ifelse(Block == "A", 2.1, 2.3),
color = case_when(
Participant %in% outliers ~ "red",
Participant %in% outliers_half ~ "orange",
TRUE ~ "blue"
))
dat |>
filter(RT < 3) |>
estimate_density(select = "RT", at = c("Participant", "Block")) |>
group_by(Participant) |>
normalize(select = "y") |>
ungroup() |>
mutate(
Participant = fct_relevel(Participant, sort(unique(dat$Participant))),
color = case_when(
Participant %in% outliers ~ "red",
Participant %in% outliers_half ~ "orange",
TRUE ~ "blue"
)
) |>
ggplot(aes(x = x, y = y)) +
geom_bar(data = data_error, aes(fill = color), stat = "identity", width=0.19) +
geom_segment(aes(x = 2, xend = 2.4, y = 0.5, yend = 0.5), color = "black", linetype="dashed", linewidth = 0.5) +
geom_area(data = normalize(d_all, select = "y"), alpha = 0.2) +
geom_line(aes(color = color, group = interaction(Participant, Block), linetype = Block), linewidth = 0.8) +
# geom_vline(xintercept = 0.125, linetype = "dashed", color = "red", size = 0.5) +
scale_color_manual(values = c("red" = "#F44336", "orange" = "#FF9800", "blue" = "blue"), guide = "none") +
scale_fill_manual(values = c("red" = "#F44336", "orange" = "#FF9800", "blue" = "blue"), guide = "none") +
scale_x_continuous(expand = c(0, 0), breaks=c(0, 0.5, 1, 1.5, 2), labels=c("0", "0.5", "1", "1.5", "2")) +
scale_y_continuous(expand = c(0, 0)) +
coord_cartesian(xlim = c(0, 2.4)) +
theme_modern() +
theme(axis.text.y = element_blank(),
axis.line.y = element_blank(),
strip.text.x = element_text(size = rel(1.5)),
legend.position = "none") +
facet_wrap(~Participant, nrow=10) +
labs(y = "", x = "Reaction Time (s)")
}plot_distribution(df[df$Participant %in% dfsub$Participant[1:100], ])Warning: The `at` argument is deprecated and might be removed in a future update.
Please replace by `by`.
Warning in geom_segment(aes(x = 2, xend = 2.4, y = 0.5, yend = 0.5), color = "black", : All aesthetics have length 1, but the data has 204800 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
plot_distribution(df[df$Participant %in% dfsub$Participant[101:200], ])Warning: The `at` argument is deprecated and might be removed in a future update.
Please replace by `by`.
Warning in geom_segment(aes(x = 2, xend = 2.4, y = 0.5, yend = 0.5), color = "black", : All aesthetics have length 1, but the data has 204800 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
We discarded 2 participants (entirely) and 2 participant’s second blocks.
df <- df |>
filter(!Participant %in% outliers) |>
filter(!((Block == "B") & (Participant %in% outliers_half)))For each block, we computed the error rate and, if more than 50%, we discarded the whole block (as it likely indicates that instructions got mixed up, for instance participants were selecting the smaller instead of the bigger circle).
errorrate |>
estimate_density(at = c("Illusion_Type", "Block"), method = "KernSmooth") |>
ggplot(aes(x = x, y = y)) +
geom_line(aes(color = Illusion_Type, linetype = Block)) +
geom_vline(xintercept = 0.5, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), labels = scales::percent) +
scale_y_continuous(expand = c(0, 0)) +
scale_color_manual(values = c("Ebbinghaus" = "#2196F3", "MullerLyer" = "#4CAF50", "VerticalHorizontal" = "#FF5722")) +
labs(y = "Distribution", x = "Error Rate") +
theme_modern()Warning: The `at` argument is deprecated and might be removed in a future update.
Please replace by `by`.
remove_badblocks <- function(df) {
n <- nrow(df)
df <- df |>
group_by(Participant, Illusion_Type, Block) |>
mutate(ErrorRate_per_block = sum(Error) / n()) |>
ungroup() |>
filter(ErrorRate_per_block < 0.5) |>
select(-ErrorRate_per_block)
text <- paste0(
"We removed ",
n - nrow(df),
" (",
insight::format_value((n - nrow(df)) / n, as_percent = TRUE),
") trials belonging to bad blocks."
)
list(data = df, text = text)
}
out <- remove_badblocks(df)
print(paste("Illusion (session 1):", out$text))[1] "Illusion (session 1): We removed 4240 (1.15%) trials belonging to bad blocks."
df <- out$datacheck_trials <- function(df) {
data <- df |>
mutate(Outlier = ifelse(RT >= 10, TRUE, FALSE)) |>
group_by(Participant) |>
mutate(Outlier = ifelse(RT < 0.150 | standardize(RT, robust = TRUE) > 4, TRUE, Outlier)) |>
ungroup()
p1 <- data |>
filter(RT < 10) |>
estimate_density(select = "RT", at = "Participant") |>
group_by(Participant) |>
normalize(select = "y") |>
ungroup() |>
merge(data |>
group_by(Participant) |>
mutate(Threshold = median(RT) + 4 * mad(RT)) |>
filter(Error == 0) |>
summarize(Threshold = mean(Threshold))) |>
mutate(Outlier = ifelse(x >= Threshold, TRUE, FALSE)) |>
ggplot(aes(x = x, y = y)) +
geom_area(data = normalize(estimate_density(filter(data, RT < 10), select = "RT"), select = "y"), alpha = 0.2) +
geom_line(aes(color = Participant, linetype = Outlier), alpha = 0.2) +
geom_vline(xintercept = c(125), linetype = "dashed", color = "red") +
scale_color_material_d("rainbow", guide = "none") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
guides(linetype = "none") +
coord_cartesian(xlim = c(0, 5)) +
theme_modern() +
theme(axis.text.y = element_blank()) +
labs(y = "", x = "Reaction Time (s)")
p2 <- data |>
group_by(Participant) |>
summarize(Outlier = sum(Outlier) / n()) |>
mutate(Participant = fct_reorder(Participant, Outlier)) |>
ggplot(aes(x = Participant, y = Outlier)) +
geom_bar(stat = "identity", aes(fill = Participant)) +
scale_fill_material_d("rainbow", guide = "none") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), labels = scales::percent) +
see::theme_modern() +
theme(axis.text.x = element_blank()) +
labs(y = "Percentage of outlier trials")
text <- paste0(
"We removed ",
sum(data$Outlier),
" (",
insight::format_value(sum(data$Outlier) / nrow(data), as_percent = TRUE),
") outlier trials (150 ms < RT < 4 MAD above median)."
)
data <- filter(data, Outlier == FALSE)
data$Outlier <- NULL
list(p = p1 / p2, data = data, text = text)
}out <- check_trials(df)Warning: The `at` argument is deprecated and might be removed in a future update.
Please replace by `by`.
out$text[1] “We removed 20246 (5.58%) outlier trials (150 ms < RT < 4 MAD above median).”
out$pdf <- out$dataoutliers <- dfsub |>
select(IPIP6_SD, IPIP6_Duration, PID5_SD, PID5_Duration, SSS_SD, SSS_Duration) |>
standardize(robust=TRUE) |>
mutate(SD = rowSums(across(ends_with("SD"))), Duration = rowSums(across(ends_with("Duration"))))
table <- dfsub |>
select(Participant, IPIP6_SD, IPIP6_Duration, PID5_SD, PID5_Duration, SSS_SD, SSS_Duration) |>
mutate_at(vars(ends_with("Duration")), \(x) ifelse(x > 10, 10, x)) |>
mutate(Participant = fct_reorder(Participant, desc(outliers$SD))) |>
arrange(Participant)
data.frame(Participant = c("Average"), t(sapply(table[2:ncol(table)], mean, na.rm = TRUE))) |>
rbind(table) |>
gt::gt() |>
gt::fmt_number() |>
gt::data_color(columns = "Participant", fn=\(x) ifelse(x %in% outliers_half, "orange", "white")) |>
gt::data_color(columns = ends_with("Duration"),
direction="column", palette = "RdBu") |>
gt::data_color(columns = ends_with("SD"),
direction="column", palette = "RdBu", reverse=TRUE) |>
gt::data_color(direction="row", rows=1, palette = "grey") |>
gt::opt_interactive(use_compact_mode = TRUE)dfsub <- filter(dfsub, !Participant %in% outliers)
df <- filter(df, Participant %in% dfsub$Participant)p_age <- estimate_density(dfsub$Age) |>
normalize(select = y) |>
mutate(y = y * 86) |> # To match the binwidth
ggplot(aes(x = x)) +
geom_histogram(data=dfsub, aes(x = Age, fill=Gender), bins=28) +
# geom_line(aes(y = y), color = "orange", linewidth=2) +
geom_vline(xintercept = mean(dfsub$Age), color = "red", linewidth=1.5) +
# geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292", "Other"="orange")) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
theme_modern(axis.title.space = 10) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_agep_edu <- dfsub |>
mutate(Education = fct_relevel(Education, "Other", "High School", "Bachelor", "Master", "Doctorate")) |>
ggplot(aes(x = Education)) +
geom_bar(aes(fill = Education)) +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_viridis_d(guide = "none") +
labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_eduggwaffle::waffle_iron(dfsub, ggwaffle::aes_d(group = Ethnicity), rows=10) |>
ggplot(aes(x, y, fill = group)) +
ggwaffle::geom_waffle() +
coord_equal() +
scale_fill_flat_d() +
ggwaffle::theme_waffle() +
labs(title = "Self-reported Ethnicity", subtitle = "Each square represents a participant", fill="") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2)),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)The final sample includes 768 participants (Mean age = 22.0, SD = 8.4, range: [16, 73]; Gender: 76.4% women, 20.8% men, 2.60% non-binary, 0.13% missing; Education: Bachelor, 19.79%; Doctorate, 1.69%; High School, 70.44%; Master, 2.73%; missing, 0.13%; Other, 5.21%).
write.csv(dfsub, "../data/data_participants.csv", row.names = FALSE)
write.csv(df, "../data/data_IllusionGame.csv", row.names = FALSE)